Single cell transcriptomes of human blastoids
2022-01-30
Yu, L., Wei, Y., Duan, J., Schmitz, D.A., Sakurai, M., Wang, L., Wang, K., Zhao, S., Hon, G.C., and Wu, J. (2021). Blastocyst-like structures generated from human pluripotent stem cells. Nature 591, 620–626.
- BioProject Accession: PRJNA632846
- GEO Accession: GSE150570
- GitHub repository: jlduan/Human_blastoid@GitHub
Rendered at 2022-01-30 14:51:40 America/Chicago.
Load required packages.
Sys.time()[1] "2022-01-30 14:51:41 CST"
[1] "America/Chicago"
Data preparation
Functions loading
source(
file = file.path(
SCRIPT_DIR,
"utilities.R"
)
)
load_matrix <- function(x) {
matrix_readcount_use <- scipy.sparse$load_npz(
file.path(x, "matrix_readcount.npz")
)
colnames(matrix_readcount_use) <- np$load(
file.path(x, "matrix_readcount_barcodes.npy")
)
rownames(matrix_readcount_use) <- np$load(
file.path(x, "matrix_readcount_features.npy")
)
return(matrix_readcount_use)
}Data loading
PROJECT_DIR <- "/Users/jialei/Dropbox/Data/Projects/UTSW/Human_blastoid"Matrix
MATRIX_DIR <- list(
"github/data/matrices/LW36",
"github/data/matrices/LW49_LW50_LW51_LW52",
"github/data/matrices/LW58_LW59",
"github/data/matrices/LW60_LW61",
"raw/public/PRJEB11202/reformatted_matrix"
)
matrix_readcount_use <- purrr::map(MATRIX_DIR, \(x) {
load_matrix(file.path(PROJECT_DIR, x))
}) |>
purrr::reduce(cbind)
# clean up
rm(MATRIX_DIR)Embedding
Metadata
cell_metadata_PRJEB11202 <- read_delim(
file.path(
PROJECT_DIR,
"raw/public/PRJEB11202/",
"E-MTAB-3929.sdrf.tsv"
),
delim = "\t"
) |>
dplyr::select(
`Comment[ENA_SAMPLE]`,
`Comment[ENA_RUN]`,
`Characteristics[developmental stage]`,
`Characteristics[inferred lineage]`
) |>
dplyr::rename(
cell = `Comment[ENA_SAMPLE]`,
run = `Comment[ENA_RUN]`,
developmental_stage = `Characteristics[developmental stage]`,
lineage = `Characteristics[inferred lineage]`
) |>
dplyr::mutate(
developmental_stage = str_replace(
string = developmental_stage,
pattern = "embryonic day ",
replacement = "E"
),
lineage = case_when(
lineage == "epiblast" ~ "EPI",
lineage == "primitive endoderm" ~ "HYP",
lineage == "trophectoderm" ~ "TE",
lineage == "not applicable" ~ "Pre-lineage"
)
)
embedding <- embedding |>
dplyr::left_join(
cell_metadata_PRJEB11202
) |>
dplyr::mutate(
developmental_stage = factor(developmental_stage),
lineage = factor(lineage)
)Check memory usage.
walk(list(matrix_readcount_use, embedding), \(x) {
print(object.size(x), units = "auto", standard = "SI")
})496.5 MB
1.6 MB
Blastoids globally resemble blastocysts
Embedding visualization
EMBEDDING_TITLE_PREFIX <- "UMAP"
x_column <- "x_umap"
y_column <- "y_umap"
GEOM_POINT_SIZE <- 0.6
RASTERISED <- FALSEClustering & batch & UMI
p_embedding_louvain <- plot_embedding(
data = embedding[, c(x_column, y_column)],
color = embedding$louvain |> as.factor(),
label = glue::glue("{EMBEDDING_TITLE_PREFIX}; Cluster"),
color_labels = TRUE,
color_legend = FALSE,
sort_values = FALSE,
shuffle_values = TRUE,
rasterise = RASTERISED,
geom_point_size = GEOM_POINT_SIZE
) +
theme_customized_embedding()
p_embedding_batch <- plot_embedding(
data = embedding[, c(x_column, y_column)],
color = embedding$batch |> as.factor(),
label = glue::glue("{EMBEDDING_TITLE_PREFIX}; Batch"),
color_labels = FALSE,
color_legend = TRUE,
sort_values = FALSE,
shuffle_values = TRUE,
rasterise = RASTERISED,
geom_point_size = GEOM_POINT_SIZE
) +
theme_customized_embedding()
p_embedding_UMI <- plot_embedding(
data = embedding[, c(x_column, y_column)],
color = log10(Matrix::colSums(matrix_readcount_use[, embedding$cell])),
label = glue::glue("{EMBEDDING_TITLE_PREFIX}; UMI"),
color_legend = TRUE,
sort_values = FALSE,
shuffle_values = TRUE,
rasterise = RASTERISED,
geom_point_size = GEOM_POINT_SIZE
) +
theme_customized_embedding()
selected_feature <- "ENSG00000204531_POU5F1"
p_embedding_POU5F1 <- plot_embedding(
data = embedding[, c(x_column, y_column)],
color = log10(
calc_cpm(matrix_readcount_use[, embedding$cell])
[selected_feature, ] + 1
),
label = glue::glue(
"{EMBEDDING_TITLE_PREFIX}; ",
"{selected_feature |> stringr::str_remove(pattern = \"^E.+_\")}"
),
color_legend = TRUE,
sort_values = TRUE,
rasterise = RASTERISED,
geom_point_size = GEOM_POINT_SIZE,
na_value = "grey80"
) +
theme_customized_embedding()embedding |>
dplyr::mutate(
num_umis = colSums(matrix_readcount_use[, cell]),
num_features = colSums(matrix_readcount_use[, cell] > 0),
) |>
dplyr::group_by(louvain) |>
dplyr::summarise(
num_cells = n(),
median_umis = median(num_umis),
median_features = median(num_features)
) |>
gt::gt() |>
gt::data_color(
columns = c(median_umis),
colors = scales::col_numeric(
palette = c(
"green", "orange", "red"
),
domain = c(1000, 32000)
)
) |>
gt::summary_rows(
columns = c(louvain),
fns = list(
Count = ~ n()
),
decimals = 0
) |>
gt::summary_rows(
columns = c(median_umis:median_features),
fns = list(
Mean = ~ mean(.)
),
decimals = 0
) |>
gt::summary_rows(
columns = c(num_cells),
fns = list(
Sum = ~ sum(.)
),
decimals = 0
) |>
gt::tab_header(
title = gt::md("**Blastoid Clustering**; Cluster")
)| Blastoid Clustering; Cluster | ||||
|---|---|---|---|---|
| louvain | num_cells | median_umis | median_features | |
| 0 | 1533 | 11534.0 | 3074.0 | |
| 1 | 1113 | 18092.0 | 3612.0 | |
| 2 | 1079 | 11308.0 | 2940.0 | |
| 3 | 996 | 14659.0 | 3220.5 | |
| 4 | 924 | 1108.5 | 627.0 | |
| 5 | 725 | 6915.0 | 1981.0 | |
| 6 | 710 | 9711.5 | 2158.0 | |
| 7 | 672 | 10544.0 | 2791.5 | |
| 8 | 649 | 11961.0 | 2905.0 | |
| 9 | 570 | 19201.0 | 3656.0 | |
| 10 | 410 | 23705.5 | 4208.5 | |
| 11 | 366 | 13910.5 | 3355.5 | |
| 12 | 365 | 16942.0 | 3437.0 | |
| 13 | 275 | 8010.0 | 2455.0 | |
| 14 | 249 | 31950.0 | 4954.0 | |
| 15 | 202 | 8217.0 | 2452.0 | |
| 16 | 138 | 10242.5 | 2852.5 | |
| 17 | 120 | 17426.0 | 3400.0 | |
| 18 | 86 | 18158.0 | 3853.5 | |
| Count | 19 | — | — | — |
| Mean | — | — | 13,873 | 3,049 |
| Sum | — | 11,182 | — | — |
embedding |>
dplyr::mutate(
num_umis = colSums(matrix_readcount_use[, cell]),
num_features = colSums(matrix_readcount_use[, cell] > 0),
) |>
dplyr::group_by(batch) |>
dplyr::summarise(
num_cells = n(),
median_umis = median(num_umis),
median_features = median(num_features)
) |>
dplyr::mutate(
sample = dplyr::case_when(
batch == "PRJEB11202" ~ "Petropoulos et al., 2016",
batch == "LW60" ~ "Blastoid, D9; HT; 5i/L/A",
batch == "LW61" ~ "Blastoid, D9; HT; 5i/L/A"
)
) |>
dplyr::select(
sample, dplyr::everything()
) |>
gt::gt() |>
gt::data_color(
columns = c(median_umis),
colors = scales::col_numeric(
palette = c(
"green", "orange", "red"
),
domain = c(7000, 1600000)
)
) |>
gt::summary_rows(
columns = c(sample:batch),
fns = list(
Count = ~ n()
),
decimals = 0
) |>
gt::summary_rows(
columns = c(median_umis:median_features),
fns = list(
Mean = ~ mean(.)
),
decimals = 0
) |>
gt::summary_rows(
columns = c(num_cells),
fns = list(
Sum = ~ sum(.)
),
decimals = 0
) |>
gt::tab_header(
title = gt::md("**Blastoid Clustering**; Batch")
)| Blastoid Clustering; Batch | |||||
|---|---|---|---|---|---|
| sample | batch | num_cells | median_umis | median_features | |
| Blastoid, D9; HT; 5i/L/A | LW60 | 4497 | 14421 | 3337 | |
| Blastoid, D9; HT; 5i/L/A | LW61 | 5156 | 7625 | 2185 | |
| Petropoulos et al., 2016 | PRJEB11202 | 1529 | 1551093 | 10305 | |
| Count | 3 | 3 | — | — | — |
| Mean | — | — | — | 524,380 | 5,276 |
| Sum | — | — | 11,182 | — | — |
purrr::reduce(list(
p_embedding_louvain,
p_embedding_batch,
p_embedding_UMI,
p_embedding_POU5F1
), `+`) +
patchwork::plot_layout(ncol = 2) +
patchwork::plot_annotation(
theme = ggplot2::theme(plot.margin = ggplot2::margin())
)Developmental stages
Salmon: highlighted group of cells; Light grey: cells belonging to this dataset but not the highlighted group; Dark grey: cells belonging to other datasets.
purrr::map(levels(embedding$developmental_stage), \(x) {
plot_embedding(
data = embedding[, c(x_column, y_column)],
color = as.integer(embedding$developmental_stage == x) |> as.factor(),
label = glue::glue("{EMBEDDING_TITLE_PREFIX}; Petropoulos et al., 2016, {x}"),
color_legend = FALSE,
sort_values = TRUE,
geom_point_size = GEOM_POINT_SIZE,
) +
theme_customized_embedding() +
ggplot2::scale_color_manual(values = c("grey70", "salmon"))
}) |>
purrr::reduce(`+`) +
patchwork::plot_layout(ncol = 3, byrow = TRUE) +
patchwork::plot_annotation(
theme = ggplot2::theme(plot.margin = ggplot2::margin())
)Polishing
color_palette_cluster <- c(
"0" = "#8DD3C7",
"1" = "#9EDAE5FF",
"2" = "#BEBADA",
"3" = "#FB8072",
"4" = "#80B1D3",
"5" = "#FDB462",
"6" = "#B3DE69",
"7" = "#FCCDE5",
"8" = "#DC863B",
"9" = "#BC80BD",
"10" = "#11c638",
"11" = "#BCBD22FF",
"12" = "#17BECFFF",
"13" = "#AEC7E8FF",
"14" = "#EAD3C6",
"15" = "#98DF8AFF",
"16" = "#FF9896FF",
"17" = "#C5B0D5FF",
"18" = "#C49C94FF",
"19" = "#F7B6D2FF",
"20" = "#D33F6A",
"21" = "#8E063B",
"22" = "#023FA5"
)
cluster_labels <- embedding |>
dplyr::group_by(.data[["louvain"]]) |>
dplyr::summarise(
x = quantile(.data[[x_column]], 0.5),
y = quantile(.data[[y_column]], 0.5),
.groups = "drop"
) |>
as.data.frame()
cluster_labels[cluster_labels[["louvain"]] == 14, c("x", "y")] <- c(6.8, -1.8)
clusters_unidentified <- c(2, 3, 4, 5, 6, 7, 13, 15, 16)
cluster_labels <- cluster_labels |>
dplyr::mutate(
label = dplyr::case_when(
louvain %in% clusters_unidentified ~ paste0(
"U",
as.character(louvain)
),
TRUE ~ as.character(louvain)
)
)
# cluster
p_embedding_cluster <- embedding |>
arrange(desc(louvain)) |>
ggplot(
aes(
x = .data[[x_column]],
y = .data[[y_column]],
color = .data[["louvain"]] |> as.factor()
)
) +
geom_point(
size = 0.45,
alpha = 1,
stroke = 0,
shape = 16
) +
scale_color_manual(
values = color_palette_cluster
) +
theme_void() +
ggplot2::guides(color = "none") +
ggplot2::annotate(
"text",
family = "Arial",
x = cluster_labels[, "x"],
y = cluster_labels[, "y"],
label = cluster_labels[, "label"],
parse = TRUE,
size = 2,
color = c("black")
)
# lineage
p_embedding_lineage <- plot_embedding(
data = embedding[, c(x_column, y_column)],
color = embedding |>
dplyr::mutate(
value = factor(lineage,
levels = c("Blastoid", "TE", "HYP", "EPI", "Pre-lineage")
)
) |>
dplyr::pull(value),
label = NULL,
color_legend = TRUE,
sort_values = TRUE,
geom_point_size = 0.45,
) +
theme_customized_embedding(void = TRUE) +
ggplot2::scale_color_manual(
name = NULL,
values = c("grey70", "#8ace7e", "#ff684c", "#9467bd", "#ffd8b1"),
breaks = c("Blastoid", "EPI", "HYP", "TE", "Pre-lineage"),
labels = c(
"Blastoid cells", "Epiblast (EPI)", "Hypoblast (HYP)",
"Trophectoderm (TE)", "Pre-lineage"
),
na.value = "grey70"
)
# developmental stage
p_embedding_developmental_stage <- plot_embedding(
data = embedding[, c(x_column, y_column)],
color = embedding$developmental_stage,
label = NULL,
color_legend = TRUE,
sort_values = TRUE,
geom_point_size = 0.45,
) +
theme_customized_embedding(void = TRUE) +
ggplot2::scale_color_manual(
name = NULL,
values = c(
"grey70",
ggthemes::tableau_color_pal("Tableau 10")(
length(unique(embedding$developmental_stage))
)
),
labels = c("Blastoid cells", "E3", "E4", "E5", "E6", "E7"),
na.value = "grey70"
)features_selected <- c(
"ENSG00000181449_SOX2",
"ENSG00000187498_COL4A1",
"ENSG00000179348_GATA2"
)
lineage_labels <- c(
"(Epiblast)",
"(Hypoblast)",
"(Trophectoderm)"
)
CB_POSITION <- c(0.875, 0.32)
x_label <- ggplot_build(p_embedding_POU5F1)$layout$panel_params[[1]][c("x.range")] |>
unlist() |>
quantile(0.575)
y_label <- ggplot_build(p_embedding_POU5F1)$layout$panel_params[[1]][c("y.range")] |>
unlist() |>
quantile(0.8)
p_embedding_SOX2_COL4A1_GATA2 <- purrr::map2(features_selected, lineage_labels, \(x, y) {
selected_feature <- x
values <- log10(calc_cpm(matrix_readcount_use)[x, embedding$cell] + 1)
values[embedding$batch == "PRJEB11202"] <- NA
plot_embedding(
data = embedding[, c(x_column, y_column)],
color = log10(
calc_cpm(matrix_readcount_use[, embedding$cell])
[selected_feature, ] + 1
),
label = NULL,
color_legend = TRUE,
sort_values = TRUE,
rasterise = RASTERISED,
geom_point_size = 0.5,
na_value = "grey70"
) +
theme_customized_embedding(
x = CB_POSITION[1],
y = CB_POSITION[2],
void = TRUE,
legend_key_size = c(1.5, 1.5)
) +
ggplot2::annotate(
geom = "text",
x = x_label,
y = y_label,
label = stringr::str_c(
x |> stringr::str_remove(pattern = "^E.+_"),
y,
sep = "\n"
),
family = "Arial",
color = "black",
size = 5 / ggplot2::.pt,
hjust = 0.5,
vjust = 0
# parse = TRUE
)
})c(
list(
p_embedding_cluster,
p_embedding_lineage,
p_embedding_developmental_stage
),
p_embedding_SOX2_COL4A1_GATA2
) |>
purrr::reduce(`+`) +
patchwork::plot_layout(ncol = 2, byrow = FALSE) +
patchwork::plot_annotation(
theme = ggplot2::theme(plot.margin = ggplot2::margin())
)Expression
Embedding
features_selected <- c(
"ENSG00000204531_POU5F1",
"ENSG00000203909_DPPA5",
"ENSG00000184344_GDF3",
"ENSG00000156574_NODAL",
"ENSG00000111704_NANOG",
"ENSG00000075388_FGF4",
#
"ENSG00000164736_SOX17",
"ENSG00000125798_FOXA2",
"ENSG00000136574_GATA4",
"ENSG00000141448_GATA6",
"ENSG00000134853_PDGFRA",
"ENSG00000115414_FN1",
#
"ENSG00000107485_GATA3",
"ENSG00000118777_ABCG2",
"ENSG00000165556_CDX2",
"ENSG00000137869_CYP19A1",
"ENSG00000172818_OVOL1",
"ENSG00000126353_CCR7"
)
purrr::map(features_selected, \(x) {
selected_feature <- x
values <- log10(calc_cpm(matrix_readcount_use)[x, embedding$cell] + 1)
values[embedding$batch == "PRJEB11202"] <- NA
plot_embedding(
data = embedding[, c(x_column, y_column)],
color = values,
label = NULL,
color_legend = TRUE,
sort_values = TRUE,
rasterise = RASTERISED,
geom_point_size = 0.5,
na_value = "grey70"
) +
theme_customized_embedding(
x = CB_POSITION[1],
y = CB_POSITION[2],
void = TRUE,
legend_key_size = c(1.5, 1.5)
) +
ggplot2::annotate(
geom = "text",
x = x_label,
y = y_label,
label = x |> stringr::str_remove(pattern = "^E.+_"),
family = "Arial",
color = "black",
size = 5 / ggplot2::.pt,
hjust = 0.5,
vjust = 0
# parse = TRUE
)
}) |>
purrr::reduce(`+`) +
patchwork::plot_layout(ncol = 3, byrow = FALSE) +
patchwork::plot_annotation(
theme = ggplot2::theme(plot.margin = ggplot2::margin())
)Lollipop
clusters_selected_lollipop <- c(
11,
18,
0, 1, 8, 9, 12, 17
)
cells_selected_lollipop <- purrr::map(clusters_selected_lollipop, \(x) {
embedding |>
filter(
louvain == x,
batch != "PRJEB11202"
) |>
pull(cell)
})
names(cells_selected_lollipop) <- clusters_selected_lollipop
features_selected <- c(
"ENSG00000181449_SOX2",
"ENSG00000156574_NODAL",
"ENSG00000111704_NANOG",
"ENSG00000147596_PRDM14",
"ENSG00000075388_FGF4",
#
"ENSG00000164736_SOX17",
"ENSG00000125798_FOXA2",
"ENSG00000136574_GATA4",
"ENSG00000141448_GATA6",
"ENSG00000115414_FN1",
"ENSG00000187498_COL4A1",
#
"ENSG00000179348_GATA2",
"ENSG00000107485_GATA3",
"ENSG00000118777_ABCG2",
#
"ENSG00000126353_CCR7",
"ENSG00000137869_CYP19A1",
"ENSG00000169550_MUC15",
"ENSG00000172818_OVOL1"
)
plot_lollipop(
cells = cells_selected_lollipop,
features = features_selected,
matrix_cpm = calc_cpm(matrix_readcount_use),
size_range_limits = c(0, 4),
dot_size = 3
) +
ggplot2::theme(
legend.box = "horizontal",
axis.text.x.top = ggplot2::element_text(
angle = 90, vjust = 0.5, hjust = 0
)
)Violin
features_selected <- c(
"ENSG00000204531_POU5F1",
"ENSG00000203909_DPPA5",
"ENSG00000125798_FOXA2",
"ENSG00000134853_PDGFRA",
"ENSG00000107485_GATA3",
"ENSG00000118777_ABCG2"
)
clusters_selected_violin <- c(
11,
18,
0, 1, 8, 9, 12, 17
)
# blastoid
cells_violin <- purrr::map(clusters_selected_violin, \(x) {
embedding |>
dplyr::filter(
louvain == x,
batch != "PRJEB11202"
) |>
dplyr::pull(cell)
})
names(cells_violin) <- clusters_selected_violin
p_violin_blastoid <- plot_violin(
cells = cells_violin,
features = features_selected,
matrix_cpm = calc_cpm(matrix_readcount_use),
x_range_breaks = NULL
) +
ggplot2::scale_fill_manual(
name = NULL,
values = color_palette_cluster
) +
ggplot2::scale_color_manual(
name = NULL,
values = color_palette_cluster
) +
ggplot2::labs(title = "Blastoids; 10x Genomics") +
theme_customized_violin(panel_border_color = "black") +
ggplot2::theme(
plot.title = ggplot2::element_text(
family = "Arial",
size = 8,
margin = ggplot2::margin(
t = 0, r = 0, b = 1, l = 0,
unit = "mm"
),
hjust = 0.5
)
)
# in vivo
cells_violin <- purrr::map(clusters_selected_violin, \(x) {
embedding |>
filter(
louvain == x,
batch == "PRJEB11202"
) |>
pull(cell)
})
names(cells_violin) <- clusters_selected_violin
# re-aligned
p_violin_PRJEB11202 <- plot_violin(
cells = cells_violin,
features = features_selected,
matrix_cpm = calc_cpm(matrix_readcount_use),
x_range_breaks = NULL,
y_title = NULL
) +
ggplot2::scale_fill_manual(
name = NULL,
values = color_palette_cluster
) +
ggplot2::scale_color_manual(
name = NULL,
values = color_palette_cluster
) +
ggplot2::labs(title = "Human pre-implantation embryos; Smart-Seq2") +
theme_customized_violin(panel_border_color = "black") +
ggplot2::theme(
plot.title = ggplot2::element_text(
family = "Arial",
size = 8,
margin = ggplot2::margin(
t = 0, r = 0, b = 1, l = 0,
unit = "mm"
),
hjust = 0.5
)
)
p_violin_PRJEB11202 <- p_violin_PRJEB11202 +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
)
p_violin_blastoid_dims <- patchwork::get_dim(p_violin_blastoid)
p_violin_PRJEB11202_aligned <- patchwork::set_dim(
p_violin_PRJEB11202,
p_violin_blastoid_dims
)gridExtra::grid.arrange(
p_violin_blastoid, p_violin_PRJEB11202_aligned,
ncol = 2,
clip = FALSE
)Cluster composition
clusters_selected <- c(
11,
18,
0, 1, 8, 9, 12, 17,
10, 14
)
cell_distribution_labels_right <- calc_group_composition(
data = embedding,
x = "louvain",
group = "lineage"
) |>
dplyr::filter(
is.na(lineage),
louvain %in% clusters_selected
) |>
dplyr::mutate(
louvain = factor(
louvain,
levels = rev(louvain)
)
)
cell_distribution_labels_left <- tibble::tribble(
~louvain, ~lineage, ~label_position, ~percentage,
11L, "EPI", 0.068306011, 0.136612022,
18L, "HYP", 0.20930232, 0.37209302,
0L, "TE", 0.036855838, 0.063274625,
1L, "TE", 0.20350404, 0.33513028,
8L, "TE", 0.095531587, 0.181818182,
9L, "TE", 0.19649123, 0.2877193,
12L, "TE", 0.073972603, 0.126027397,
17L, "TE", 0.09583335, 0.1916667,
10L, "Pre-lineage", 0.284146342, 0.412195122,
14L, "Pre-lineage", 0.24698795, 0.4939759
) |>
dplyr::mutate(
louvain = factor(
louvain,
levels = rev(clusters_selected)
)
)
calc_group_composition(
data = embedding,
x = "louvain",
group = "lineage"
) |>
dplyr::filter(louvain %in% clusters_selected) |>
dplyr::mutate(
louvain = factor(louvain, levels = rev(clusters_selected)),
lineage = dplyr::case_when(
is.na(lineage) ~ "Blastoid",
TRUE ~ as.character(lineage)
),
lineage = factor(lineage,
levels = rev(c("EPI", "HYP", "TE", "Pre-lineage", "Blastoid"))
)
) |>
plot_barplot(
x = "louvain",
y = "percentage",
z = "lineage"
) +
ggplot2::theme(
panel.grid.major.y = ggplot2::element_line(color = "grey80")
) +
ggplot2::coord_flip() +
ggplot2::scale_fill_manual(
name = NULL,
values = c("grey85", "#8ace7e", "#ff684c", "#9467bd", "#ffd8b1"),
breaks = c("Blastoid", "EPI", "HYP", "TE", "Pre-lineage"),
labels = c(
"Blastoid cells", "Epiblast (EPI)", "Hypoblast (HYP)",
"Trophectoderm (TE)", "Pre-lineage"
)
) +
ggplot2::geom_text(
ggplot2::aes(
y = label_position,
x = louvain,
label = scales::percent(percentage, accuracy = 1L),
group = NULL
),
size = 1.8,
family = "Arial",
color = "black",
data = cell_distribution_labels_left,
hjust = 0.5
) +
ggplot2::geom_text(
ggplot2::aes(
y = 0.825,
x = louvain,
label = scales::percent(percentage, accuracy = 1L),
group = NULL
),
size = 1.8,
family = "Arial",
color = "black",
data = cell_distribution_labels_right,
hjust = 0
)Heatmap construction
# prepare features; cells; heatmap matrix;
features_heatmap <- c(
"ENSG00000204531_POU5F1",
"ENSG00000184344_GDF3",
"ENSG00000203909_DPPA5",
"ENSG00000181449_SOX2",
"ENSG00000111704_NANOG",
"ENSG00000241186_TDGF1",
"ENSG00000156574_NODAL",
"ENSG00000254339_AC064802.1",
"ENSG00000283567_C19orf85",
"ENSG00000151650_VENTX",
"ENSG00000163032_VSNL1",
"ENSG00000159231_CBR3",
"ENSG00000145423_SFRP2",
"ENSG00000006468_ETV1",
"ENSG00000147596_PRDM14",
#
"ENSG00000136574_GATA4",
"ENSG00000164266_SPINK1",
"ENSG00000158966_CACHD1",
"ENSG00000164093_PITX2",
"ENSG00000087303_NID2",
"ENSG00000174358_SLC6A19",
"ENSG00000134962_KLB",
"ENSG00000167780_SOAT2",
"ENSG00000110245_APOC3",
"ENSG00000170558_CDH2",
"ENSG00000125848_FLRT3",
"ENSG00000129538_RNASE1",
"ENSG00000100079_LGALS2",
"ENSG00000134853_PDGFRA",
"ENSG00000164736_SOX17",
"ENSG00000017427_IGF1",
"ENSG00000275410_HNF1B",
"ENSG00000118137_APOA1",
"ENSG00000198336_MYL4",
"ENSG00000171557_FGG",
"ENSG00000125798_FOXA2",
"ENSG00000146374_RSPO3",
"ENSG00000115414_FN1",
"ENSG00000164292_RHOBTB3",
"ENSG00000141448_GATA6",
"ENSG00000187498_COL4A1",
#
"ENSG00000196549_MME",
"ENSG00000138814_PPP3CA",
"ENSG00000082074_FYB1",
"ENSG00000123191_ATP7B",
"ENSG00000175318_GRAMD2A",
"ENSG00000100593_ISM2",
"ENSG00000265107_GJA5",
"ENSG00000226887_ERVMER34-1",
"ENSG00000183734_ASCL2",
"ENSG00000143850_PLEKHA6",
"ENSG00000103534_TMC5",
"ENSG00000132470_ITGB4",
"ENSG00000180999_C1orf105",
"ENSG00000109610_SOD3",
"ENSG00000143369_ECM1",
"ENSG00000204632_HLA-G",
"ENSG00000113594_LIFR",
"ENSG00000168394_TAP1",
"ENSG00000183287_CCBE1",
"ENSG00000079393_DUSP13",
"ENSG00000121769_FABP3",
"ENSG00000182985_CADM1",
"ENSG00000164692_COL1A2",
"ENSG00000112655_PTK7",
"ENSG00000134873_CLDN10",
"ENSG00000108375_RNF43",
"ENSG00000254726_MEX3A",
"ENSG00000164120_HPGD",
"ENSG00000081026_MAGI3",
"ENSG00000120738_EGR1",
"ENSG00000108960_MMD",
"ENSG00000112137_PHACTR1",
"ENSG00000166450_PRTG",
"ENSG00000164099_PRSS12",
"ENSG00000143320_CRABP2",
"ENSG00000145681_HAPLN1",
"ENSG00000113196_HAND1",
"ENSG00000198300_PEG3",
"ENSG00000101144_BMP7",
"ENSG00000174498_IGDCC3",
"ENSG00000132005_RFX1",
"ENSG00000099814_CEP170B",
"ENSG00000101986_ABCD1",
"ENSG00000144648_ACKR2",
"ENSG00000074410_CA12",
"ENSG00000125740_FOSB",
"ENSG00000183979_NPB",
"ENSG00000214049_UCA1",
"ENSG00000153071_DAB2",
"ENSG00000126353_CCR7",
"ENSG00000105880_DLX5",
"ENSG00000124749_COL21A1",
"ENSG00000112041_TULP1",
"ENSG00000120211_INSL4",
"ENSG00000223850_MYCNUT",
"ENSG00000164707_SLC13A4",
"ENSG00000173157_ADAMTS20",
"ENSG00000196482_ESRRG",
"ENSG00000180875_GREM2",
"ENSG00000170255_MRGPRX1",
"ENSG00000267943_AC010328.1",
"ENSG00000172818_OVOL1",
"ENSG00000137270_GCM1",
"ENSG00000137869_CYP19A1",
"ENSG00000135678_CPM",
"ENSG00000171476_HOPX",
"ENSG00000249861_LGALS16",
"ENSG00000108244_KRT23",
"ENSG00000169550_MUC15",
"ENSG00000260034_LCMT1-AS2",
"ENSG00000269526_ERVV-1",
"ENSG00000028137_TNFRSF1B",
"ENSG00000124731_TREM1",
"ENSG00000072422_RHOBTB1",
"ENSG00000185215_TNFAIP2",
"ENSG00000164877_MICALL2",
"ENSG00000244476_ERVFRD-1",
"ENSG00000280109_PLAC4",
"ENSG00000111057_KRT18",
"ENSG00000170421_KRT8",
"ENSG00000169583_CLIC3",
"ENSG00000107485_GATA3",
"ENSG00000179348_GATA2",
"ENSG00000187186_AL162231.1",
"ENSG00000176155_CCDC57",
"ENSG00000133243_BTBD2",
"ENSG00000179364_PACS2",
"ENSG00000237651_C2orf74",
"ENSG00000125726_CD70",
"ENSG00000156587_UBE2L6",
"ENSG00000182165_TP53TG1",
"ENSG00000163017_ACTG2",
"ENSG00000143632_ACTA1",
"ENSG00000119632_IFI27L2",
"ENSG00000185847_LINC01405",
"ENSG00000116661_FBXO2",
"ENSG00000196878_LAMB3",
"ENSG00000122861_PLAU",
"ENSG00000142227_EMP3",
"ENSG00000011422_PLAUR",
"ENSG00000100985_MMP9",
"ENSG00000026508_CD44",
"ENSG00000104368_PLAT",
"ENSG00000164171_ITGA2",
"ENSG00000101198_NKAIN4",
"ENSG00000116774_OLFML3",
"ENSG00000125398_SOX9",
"ENSG00000124225_PMEPA1",
"ENSG00000118785_SPP1",
"ENSG00000163453_IGFBP7",
"ENSG00000182871_COL18A1",
"ENSG00000110925_CSRNP2",
"ENSG00000165886_UBTD1",
"ENSG00000115073_ACTR1B",
"ENSG00000256288_AC022075.3",
"ENSG00000228106_AL392172.1",
"ENSG00000163577_EIF5A2"
)
# heatmap matrix
clusters_selected_heatmap <- c(
11,
18,
0, 1, 8, 9, 12, 17
)
matrix_heatmap <- calc_cpm(matrix_readcount_use)[
features_heatmap,
embedding |>
dplyr::filter(
louvain %in% clusters_selected_heatmap,
batch != "PRJEB11202"
) |>
dplyr::pull(cell)
]
matrix_heatmap <- matrix_heatmap[rowSums(matrix_heatmap) != 0, ]
matrix_heatmap <- log10(matrix_heatmap + 1)
matrix_heatmap <- t(scale(t(matrix_heatmap)))
heatmap_limits <- quantile(matrix_heatmap, c(0.05, 0.95))
matrix_heatmap[matrix_heatmap < heatmap_limits[1]] <- heatmap_limits[1]
matrix_heatmap[matrix_heatmap > heatmap_limits[2]] <- heatmap_limits[2]# sample cells
cells_heatmap_sampled <- purrr::map(clusters_selected_heatmap, \(x) {
cells_in_group <- embedding |>
filter(
louvain == x,
batch != "PRJEB11202"
) |>
pull(cell)
cat(length(cells_in_group), "\n")
if (length(cells_in_group) >= 100) {
c <- sample(cells_in_group, 100)
} else {
c <- cells_in_group
}
return(c)
})
## 312
## 51
## 1428
## 682
## 528
## 376
## 310
## 97
names(cells_heatmap_sampled) <- clusters_selected_heatmap
# cells
clusters_EPI <- 11
clusters_HYP <- 18
clusters_TE <- c(0, 1, 8, 9, 12, 17)
cells_heatmap_EPI <- embedding |>
dplyr::filter(
batch != "PRJEB11202",
louvain %in% clusters_EPI,
cell %in% unlist(cells_heatmap_sampled)
) |>
dplyr::pull(cell)
cells_heatmap_HYP <- embedding |>
dplyr::filter(
batch != "PRJEB11202",
louvain %in% clusters_HYP,
cell %in% unlist(cells_heatmap_sampled)
) |>
dplyr::pull(cell)
cells_heatmap_TE <- cells_heatmap_sampled[as.character(clusters_TE)] |>
unlist()
purrr::map_int(
list(cells_heatmap_EPI, cells_heatmap_HYP, cells_heatmap_TE),
length
)
## [1] 100 51 597# features to mark
features_to_mark_left <- c(
"ENSG00000184344_GDF3",
"ENSG00000241186_TDGF1",
"ENSG00000156574_NODAL",
"ENSG00000066468_FGFR2",
"ENSG00000111704_NANOG",
"ENSG00000204531_POU5F1",
"ENSG00000203909_DPPA5",
"ENSG00000181449_SOX2",
"ENSG00000186103_ARGFX",
"ENSG00000147596_PRDM14",
"ENSG00000171872_KLF17",
"ENSG00000086548_CEACAM6",
"ENSG00000166073_GPR176",
"ENSG00000125740_FOSB",
"ENSG00000105880_DLX5",
"ENSG00000169583_CLIC3",
"ENSG00000244476_ERVFRD-1",
"ENSG00000072422_RHOBTB1",
"ENSG00000137869_CYP19A1",
"ENSG00000169550_MUC15",
"ENSG00000108244_KRT23",
"ENSG00000260034_LCMT1-AS2"
)
features_to_mark_right <- c(
"ENSG00000187498_COL4A1",
"ENSG00000136574_GATA4",
"ENSG00000115414_FN1",
"ENSG00000125798_FOXA2",
"ENSG00000164736_SOX17",
"ENSG00000141448_GATA6",
"ENSG00000275410_HNF1B",
"ENSG00000153707_PTPRD",
"ENSG00000101441_CST4",
"ENSG00000146374_RSPO3",
"ENSG00000111057_KRT18",
"ENSG00000170421_KRT8",
"ENSG00000153071_DAB2",
"ENSG00000107485_GATA3",
"ENSG00000179348_GATA2",
"ENSG00000144648_ACKR2",
"ENSG00000126353_CCR7",
"ENSG00000137270_GCM1",
"ENSG00000180875_GREM2",
"ENSG00000172818_OVOL1",
"ENSG00000265107_GJA5",
"ENSG00000132470_ITGB4",
"ENSG00000074410_CA12",
"ENSG00000124749_COL21A1"
)
#
features_to_mark_left <- features_to_mark_left[
features_to_mark_left %in% rownames(matrix_heatmap)
]
ha_left <- ComplexHeatmap::rowAnnotation(
foo = ComplexHeatmap::anno_mark(
at = which(rownames(matrix_heatmap) %in% features_to_mark_left),
labels = features_to_mark_left |>
stringr::str_remove(pattern = "E.+_"),
which = "row",
side = "left",
lines_gp = grid::gpar(col = "grey50"),
labels_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 5
)
)
)
#
features_to_mark_right <- features_to_mark_right[
features_to_mark_right %in% rownames(matrix_heatmap)
]
ha_right <- ComplexHeatmap::rowAnnotation(
foo = ComplexHeatmap::anno_mark(
at = which(rownames(matrix_heatmap) %in% features_to_mark_right),
labels = features_to_mark_right |>
stringr::str_remove(pattern = "E.+_"),
which = "row",
side = "right",
lines_gp = grid::gpar(col = "grey50"),
labels_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 5
)
)
)RASTERISED <- FALSE# EPI
anno_labels_tbl_EPI <- table(
ha_cluster[ha_cluster %in% clusters_EPI]
)[as.character(clusters_EPI)] |>
tibble::as_tibble() |>
dplyr::mutate(
cum_sum = cumsum(value),
position = cum_sum - value / 2
)
anno_labels_cluster_EPI <- rep(NA, length(ha_cluster[ha_cluster %in% clusters_EPI]))
if (nchar(as.character(clusters_EPI)) > 1) {
cluster_label <- strsplit(as.character(clusters_EPI), "")[[1]]
anno_labels_cluster_EPI[anno_labels_tbl_EPI$position - 5] <- cluster_label[1]
anno_labels_cluster_EPI[anno_labels_tbl_EPI$position + 5] <- cluster_label[2]
} else {
anno_labels_cluster_EPI[anno_labels_tbl_EPI$position] <- as.character(clusters_EPI)
}
ha_column_EPI <- ComplexHeatmap::HeatmapAnnotation(
#
lineage = ComplexHeatmap::anno_simple(
ha_lineage[ha_lineage == "EPI"],
# pch = anno_labels_cluster,
col = setNames(
object = c("grey85", "#8ace7e", "#ff684c", "#9467bd", "#ffd8b1"),
nm = c("Blastoid", "EPI", "HYP", "TE", "n/a")
),
which = "column",
pt_size = unit(2, "mm"),
pt_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 5
),
simple_anno_size = unit(2, "mm")
),
#
cluster = ComplexHeatmap::anno_simple(
ha_cluster[ha_cluster %in% clusters_EPI],
pch = anno_labels_cluster_EPI,
col = color_palette_cluster[as.character(clusters_selected_heatmap)],
which = "column",
# pt_size = unit(2, "mm"),
pt_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 5
),
simple_anno_size = unit(2, "mm")
),
#
show_annotation_name = TRUE,
annotation_label = c(
"Lineage",
"Cluster"
),
annotation_name_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
annotation_name_side = "left"
)
ht_EPI <- ComplexHeatmap::Heatmap(
matrix = matrix_heatmap[, cells_heatmap_EPI] |> as.matrix(),
rect_gp = grid::gpar(col = NA, lwd = 0),
# col = viridis::plasma(n = 10),
# col = wesanderson::wes_palette("Zissou1", 50, type = "continuous"),
col = col_fun,
#
# row_title = character(0),
# row_title_side = c("left", "right"),
row_title_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
row_title_rot = 0,
# column_title = character(0),
# column_title_side = c("top", "bottom"),
column_title_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
column_title_rot = 0,
#
cluster_rows = FALSE,
show_row_dend = FALSE,
cluster_columns = FALSE,
show_column_dend = FALSE,
#
show_row_names = FALSE,
# row_labels = str_remove(string = rownames(matrix_heatmap), pattern = "^E.+_"),
# row_names_side = c("left"),
# row_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
# row_names_rot = 0,
# row_names_centered = FALSE,
show_column_names = FALSE,
# column_labels = colnames(matrix),
# column_names_side = c("top"),
# column_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
# column_names_rot = 90,
# column_names_centered = FALSE,
#
top_annotation = ha_column_EPI,
bottom_annotation = NULL,
left_annotation = ha_left,
right_annotation = NULL,
#
# row_split = table_s2_sheet4$ClusterID,
# row_gap = unit(0.3, "mm"),
column_split = factor(
ha_lineage[ha_lineage == "EPI"],
levels = unique(ha_lineage[ha_lineage == "EPI"])
),
column_gap = unit(0, "mm"),
#
show_heatmap_legend = FALSE,
heatmap_legend_param = list(
# title = expression(paste("Log"[10], " (CPM + 1)")),
title = "Expr",
title_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 6
),
legend_direction = "vertical",
labels_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 5
),
legend_height = unit(15, "mm"),
legend_width = unit(5, "mm")
),
#
use_raster = RASTERISED,
#
width = unit(8, "mm")
)
# ComplexHeatmap::draw(ht_EPI)# HYP
anno_labels_tbl_HYP <- table(
ha_cluster[ha_cluster %in% clusters_HYP]
)[as.character(clusters_HYP)] |>
as_tibble() |>
mutate(
cum_sum = cumsum(value),
position = cum_sum - value / 2
)
anno_labels_cluster_HYP <- rep(NA, length(ha_cluster[ha_cluster %in% clusters_HYP]))
if (nchar(as.character(clusters_HYP)) > 1) {
cluster_label <- strsplit(as.character(clusters_HYP), "")[[1]]
anno_labels_cluster_HYP[anno_labels_tbl_HYP$position - 5] <- cluster_label[1]
anno_labels_cluster_HYP[anno_labels_tbl_HYP$position + 5] <- cluster_label[2]
} else {
anno_labels_cluster_HYP[anno_labels_tbl_HYP$position] <- as.character(clusters_HYP)
}
ha_column_HYP <- ComplexHeatmap::HeatmapAnnotation(
#
lineage = ComplexHeatmap::anno_simple(
ha_lineage[ha_lineage == "HYP"],
# pch = anno_labels_cluster,
col = setNames(
object = c("grey85", "#8ace7e", "#ff684c", "#9467bd", "#ffd8b1"),
nm = c("Blastoid", "EPI", "HYP", "TE", "n/a")
),
which = "column",
pt_size = unit(2, "mm"),
pt_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 5
),
simple_anno_size = unit(2, "mm")
),
#
cluster = ComplexHeatmap::anno_simple(
ha_cluster[ha_cluster %in% clusters_HYP],
pch = anno_labels_cluster_HYP,
col = color_palette_cluster[as.character(clusters_selected_heatmap)],
which = "column",
# pt_size = unit(2, "mm"),
pt_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 5
),
simple_anno_size = unit(2, "mm")
),
#
show_annotation_name = FALSE,
annotation_label = c(
"Lineage",
"Cluster"
),
annotation_name_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
annotation_name_side = "left"
)
ht_HYP <- ComplexHeatmap::Heatmap(
matrix = matrix_heatmap[, cells_heatmap_HYP] |> as.matrix(),
rect_gp = grid::gpar(col = NA, lwd = 0),
# col = viridis::plasma(n = 10),
# col = wesanderson::wes_palette("Zissou1", 50, type = "continuous"),
col = col_fun,
#
# row_title = character(0),
# row_title_side = c("left", "right"),
row_title_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
row_title_rot = 0,
# column_title = character(0),
# column_title_side = c("top", "bottom"),
column_title_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
column_title_rot = 0,
#
cluster_rows = FALSE,
show_row_dend = FALSE,
cluster_columns = FALSE,
show_column_dend = FALSE,
#
show_row_names = FALSE,
# row_labels = str_remove(string = rownames(matrix_heatmap), pattern = "^E.+_"),
# row_names_side = c("left"),
# row_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
# row_names_rot = 0,
# row_names_centered = FALSE,
show_column_names = FALSE,
# column_labels = colnames(matrix),
# column_names_side = c("top"),
# column_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
# column_names_rot = 90,
# column_names_centered = FALSE,
#
top_annotation = ha_column_HYP,
bottom_annotation = NULL,
left_annotation = NULL,
right_annotation = NULL,
#
# row_split = table_s2_sheet4$ClusterID,
# row_gap = unit(0.3, "mm"),
column_split = factor(
ha_lineage[ha_lineage == "HYP"],
levels = unique(ha_lineage[ha_lineage == "HYP"])
),
column_gap = unit(0, "mm"),
#
show_heatmap_legend = FALSE,
heatmap_legend_param = list(
# title = expression(paste("Log"[10], " (CPM + 1)")),
title = "Expr",
title_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 6
),
legend_direction = "vertical",
labels_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 5
),
legend_height = unit(15, "mm"),
legend_width = unit(5, "mm")
),
#
use_raster = RASTERISED,
#
width = unit(5, "mm")
)
# ComplexHeatmap::draw(ht_HYP)# TE
anno_labels_tbl_TE <- table(
ha_cluster[ha_cluster %in% clusters_TE]
)[as.character(clusters_TE)] |>
tibble::enframe() |>
dplyr::mutate(
cum_sum = cumsum(value),
position = cum_sum - value / 2
)
anno_labels_tbl_TE <- purrr::map(anno_labels_tbl_TE$name, \(x) {
# works but ugly
a <- anno_labels_tbl_TE |>
filter(name == x)
if (nchar(a$name) > 1) {
cluster_label <- strsplit(as.character(a$name), "")[[1]]
a <- rbind(a, a)
a[1, 1] <- cluster_label[1]
a[2, 1] <- cluster_label[2]
a <- a |> as.data.frame()
a[1, 4] <- a[1, 4] - 5
a[2, 4] <- a[2, 4] + 5
}
return(a)
}) |>
purrr::reduce(rbind)
anno_labels_cluster_TE <- rep(NA, length(ha_cluster[ha_cluster %in% clusters_TE]))
anno_labels_cluster_TE[anno_labels_tbl_TE$position] <- anno_labels_tbl_TE$name
ha_column_TE <- ComplexHeatmap::HeatmapAnnotation(
#
lineage = ComplexHeatmap::anno_simple(
ha_lineage[ha_lineage == "TE"],
# pch = anno_labels_cluster,
col = setNames(
object = c("grey85", "#8ace7e", "#ff684c", "#9467bd", "#ffd8b1"),
nm = c("Blastoid", "EPI", "HYP", "TE", "n/a")
),
which = "column",
pt_size = unit(2, "mm"),
pt_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 5
),
simple_anno_size = unit(2, "mm")
),
#
cluster = ComplexHeatmap::anno_simple(
ha_cluster[ha_cluster %in% clusters_TE],
pch = anno_labels_cluster_TE,
col = color_palette_cluster[as.character(clusters_selected_heatmap)],
which = "column",
# pt_size = unit(2, "mm"),
pt_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 5
),
simple_anno_size = unit(2, "mm")
),
#
show_annotation_name = FALSE,
annotation_label = c(
"Lineage",
"Cluster"
),
annotation_name_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
annotation_name_side = "left"
)
ht_TE <- ComplexHeatmap::Heatmap(
matrix = matrix_heatmap[, cells_heatmap_TE] |> as.matrix(),
rect_gp = grid::gpar(col = NA, lwd = 0),
# col = viridis::plasma(n = 10),
# col = wesanderson::wes_palette("Zissou1", 50, type = "continuous"),
col = col_fun,
#
# row_title = character(0),
# row_title_side = c("left", "right"),
row_title_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
row_title_rot = 0,
# column_title = character(0),
# column_title_side = c("top", "bottom"),
column_title_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
column_title_rot = 0,
#
cluster_rows = FALSE,
show_row_dend = FALSE,
cluster_columns = FALSE,
show_column_dend = FALSE,
#
show_row_names = FALSE,
# row_labels = str_remove(string = rownames(matrix_heatmap), pattern = "^E.+_"),
# row_names_side = c("left"),
# row_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
# row_names_rot = 0,
# row_names_centered = FALSE,
show_column_names = FALSE,
# column_labels = colnames(matrix),
# column_names_side = c("top"),
# column_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
# column_names_rot = 90,
# column_names_centered = FALSE,
#
top_annotation = ha_column_TE,
bottom_annotation = NULL,
left_annotation = NULL,
right_annotation = ha_right,
#
# row_split = table_s2_sheet4$ClusterID,
# row_gap = unit(0.3, "mm"),
column_split = factor(
ha_lineage[ha_lineage == "TE"],
levels = unique(ha_lineage[ha_lineage == "TE"])
),
column_gap = unit(0, "mm"),
#
show_heatmap_legend = FALSE,
heatmap_legend_param = list(
# title = expression(paste("Log"[10], " (CPM + 1)")),
title = "Expr",
title_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 6
),
legend_direction = "vertical",
labels_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 5
),
legend_height = unit(15, "mm"),
legend_width = unit(5, "mm")
),
#
use_raster = RASTERISED,
#
width = unit(50, "mm")
)# legend
lgd_colorbar <- ComplexHeatmap::Legend(
col_fun = col_fun,
title = "Expr",
grid_height = unit(1, "mm"),
grid_width = unit(2, "mm"),
legend_height = unit(10, "mm"),
legend_width = unit(2, "mm"),
title_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 6
),
labels_gp = grid::gpar(
fontfamily = "Arial",
fontsize = 5
)
)
pd <- ComplexHeatmap::packLegend(
# lgd_cluster,
# lgd_lineage,
lgd_colorbar,
# gap = unit(8, "mm"),
direction = "vertical"
)Gene Ontology enrichment
enriched_go <- tibble::tribble(
~category, ~rank, ~go_id, ~term, ~p_value, ~p_value_log,
"EPI", 1L, "GO:0019827", "stem cell population maintenance", 9e-07, 6.045757491,
"EPI", 2L, "GO:0048368", "lateral mesoderm development", 0.00053, 3.27572413,
"EPI", 3L, "GO:0009790", "embryo development", 0.00088, 3.055517328,
"HYP", 1L, "GO:0048598", "embryonic morphogenesis", 3.9e-07, 6.408935393,
"HYP", 2L, "GO:0007369", "gastrulation", 1.8e-05, 4.744727495,
"HYP", 3L, "GO:0007492", "endoderm development", 0.00284, 2.54668166,
"TE", 1L, "GO:0001890", "placenta development", 6e-11, 10.22184875,
"TE", 2L, "GO:0001892", "embryonic placenta development", 2e-08, 7.698970004,
"TE", 3L, "GO:0048513", "animal organ development", 2.7e-07, 6.568636236
)
enriched_go |>
dplyr::mutate(
category = factor(
category,
levels = c("EPI", "HYP", "TE") # |> rev()
),
rank = factor(
rank,
levels = c(3, 2, 1)
)
) |>
plot_barplot_go_enrichment(
x = p_value_log,
y = rank,
z = category
) +
ggplot2::facet_wrap(
~category,
nrow = 1,
strip.position = "left",
scales = "free_x",
labeller = ggplot2::labeller(
category = c("EPI" = "EPI", "HYP" = "HYP", "TE" = "TE")
)
) +
ggplot2::geom_text(
ggplot2::aes(
x = 0,
label = term,
group = NULL
),
size = 2.2,
family = "Arial",
color = "black",
data = enriched_go |>
dplyr::mutate(
category = factor(
category,
levels = c("EPI", "HYP", "TE")
),
rank = factor(
rank,
levels = c(3, 2, 1)
),
term = paste(" ", term)
),
hjust = 0
) +
ggplot2::scale_fill_manual(
values = c("#8ace7e", "#ff684c", "#9467bd")
)High concordance of blastoid replicates
Data loading
MATRIX_DIR <- list(
"github/data/matrices/LW36",
"github/data/matrices/LW49_LW50_LW51_LW52",
"github/data/matrices/LW58_LW59",
"github/data/matrices/LW60_LW61",
"raw/public/PRJEB11202/reformatted_matrix"
)
matrix_readcount_use <- purrr::map(MATRIX_DIR, \(x) {
load_matrix(file.path(PROJECT_DIR, x))
}) |>
purrr::reduce(cbind)
matrix_readcount_use <- matrix_readcount_use[, embedding_replicate$cell]Embedding visualization
EMBEDDING_TITLE_PREFIX <- "FIt-SNE"
x_column <- "x_fitsne"
y_column <- "y_fitsne"Clustering & batch & UMI
p_embedding_replicate_louvain <- plot_embedding(
data = embedding_replicate[, c(x_column, y_column)],
color = embedding_replicate$louvain |> as.factor(),
label = glue::glue("{EMBEDDING_TITLE_PREFIX}; Cluster"),
color_labels = TRUE,
color_legend = FALSE,
sort_values = FALSE,
shuffle_values = TRUE,
rasterise = RASTERISED,
geom_point_size = GEOM_POINT_SIZE
) +
theme_customized_embedding()
p_embedding_replicate_batch <- plot_embedding(
data = embedding_replicate[, c(x_column, y_column)],
color = embedding_replicate$batch |> as.factor(),
label = glue::glue("{EMBEDDING_TITLE_PREFIX}; Batch"),
color_labels = FALSE,
color_legend = TRUE,
sort_values = FALSE,
shuffle_values = TRUE,
rasterise = RASTERISED,
geom_point_size = GEOM_POINT_SIZE
) +
theme_customized_embedding()
CB_POSITION <- c(0.875, 0.3)
p_embedding_replicate_UMI <- plot_embedding(
data = embedding_replicate[, c(x_column, y_column)],
color = log10(
Matrix::colSums(matrix_readcount_use[, embedding_replicate$cell])
),
label = glue::glue("{EMBEDDING_TITLE_PREFIX}; UMI"),
color_legend = TRUE,
sort_values = FALSE,
shuffle_values = TRUE,
rasterise = RASTERISED,
geom_point_size = GEOM_POINT_SIZE
) +
theme_customized_embedding(
x = CB_POSITION[1],
y = CB_POSITION[2]
)embedding_replicate |>
dplyr::mutate(
num_umis = colSums(matrix_readcount_use[, cell]),
num_features = colSums(matrix_readcount_use[, cell] > 0),
) |>
dplyr::group_by(batch) |>
dplyr::summarise(
num_cells = n(),
median_umis = median(num_umis),
median_features = median(num_features)
) |>
dplyr::mutate(
sample = dplyr::case_when(
batch == "LW36" ~ "Blastoid, D8; TH; 5i/L/A",
batch == "LW60" ~ "Blastoid, D9; HT; 5i/L/A",
batch == "LW61" ~ "Blastoid, D9; HT; PXGL"
)
) |>
dplyr::select(
sample, dplyr::everything()
) |>
gt::gt() |>
gt::data_color(
columns = c(median_umis),
colors = scales::col_numeric(
palette = c(
"green", "orange", "red"
),
domain = c(7500, 32000)
)
) |>
gt::summary_rows(
columns = c(sample, batch),
fns = list(
Count = ~ n()
),
decimals = 0
) |>
gt::summary_rows(
columns = c(median_umis:median_features),
fns = list(
Mean = ~ mean(.)
),
decimals = 0
) |>
gt::summary_rows(
columns = c(num_cells),
fns = list(
Sum = ~ sum(.)
),
decimals = 0
) |>
gt::tab_header(
title = gt::md("**Clustering of TH- and HT-Blastoids**; Batch")
)| Clustering of TH- and HT-Blastoids; Batch | |||||
|---|---|---|---|---|---|
| sample | batch | num_cells | median_umis | median_features | |
| Blastoid, D8; TH; 5i/L/A | LW36 | 6854 | 31193 | 5014 | |
| Blastoid, D9; HT; 5i/L/A | LW60 | 4497 | 14421 | 3337 | |
| Blastoid, D9; HT; PXGL | LW61 | 5156 | 7625 | 2185 | |
| Count | 3 | 3 | — | — | — |
| Mean | — | — | — | 17,746 | 3,512 |
| Sum | — | — | 16,507 | — | — |
purrr::reduce(
list(
p_embedding_replicate_louvain,
p_embedding_replicate_batch,
p_embedding_replicate_UMI
), `+`
) +
patchwork::plot_layout(ncol = 3) +
patchwork::plot_annotation(
theme = ggplot2::theme(plot.margin = ggplot2::margin())
)Cluster composition
calc_group_composition(
data = embedding_replicate,
x = "louvain",
group = "batch"
) |>
dplyr::mutate(
louvain = as.factor(louvain)
) |>
plot_barplot(
x = "louvain",
y = "percentage",
z = "batch"
) +
ggplot2::guides(fill = ggplot2::guide_legend(direction = "horizontal")) +
ggplot2::theme(legend.position = "bottom")Blastoids derived stem cell lines
Data loading
MATRIX_DIR <- list(
"github/data/matrices/LW36",
"github/data/matrices/LW49_LW50_LW51_LW52",
"github/data/matrices/LW58_LW59",
"github/data/matrices/LW60_LW61",
"raw/public/PRJEB11202/reformatted_matrix"
)
matrix_readcount_use <- purrr::map(MATRIX_DIR, \(x) {
load_matrix(file.path(PROJECT_DIR, x))
}) |>
purrr::reduce(cbind)
matrix_readcount_use <- matrix_readcount_use[, embedding_stem$cell]Embedding visualization
EMBEDDING_TITLE_PREFIX <- "FIt-SNE"
x_column <- "x_fitsne"
y_column <- "y_fitsne"Clustering & batch & UMI
p_embedding_stem_batch <- plot_embedding(
data = embedding_stem[, c(x_column, y_column)],
color = embedding_stem$batch |> as.factor(),
label = glue::glue("{EMBEDDING_TITLE_PREFIX}; Batch"),
color_labels = FALSE,
color_legend = TRUE,
sort_values = FALSE,
shuffle_values = TRUE,
rasterise = RASTERISED,
geom_point_size = GEOM_POINT_SIZE
) +
theme_customized_embedding()
CB_POSITION <- c(0.875, 0.3)
p_embedding_stem_UMI <- plot_embedding(
data = embedding_stem[, c(x_column, y_column)],
color = log10(Matrix::colSums(matrix_readcount_use[, embedding_stem$cell])),
label = glue::glue("{EMBEDDING_TITLE_PREFIX}; UMI"),
color_legend = TRUE,
sort_values = FALSE,
shuffle_values = TRUE,
rasterise = RASTERISED,
geom_point_size = GEOM_POINT_SIZE
) +
theme_customized_embedding(
x = CB_POSITION[1],
y = CB_POSITION[2]
)
selected_feature <- "ENSG00000204531_POU5F1"
p_embedding_stem_POU5F1 <- plot_embedding(
data = embedding_stem[, c(x_column, y_column)],
color = log10(
calc_cpm(matrix_readcount_use[, embedding_stem$cell])
[selected_feature, ] + 1
),
label = glue::glue(
"{EMBEDDING_TITLE_PREFIX}; ",
"{selected_feature |> stringr::str_remove(pattern = \"^E.+_\")}"
),
color_legend = TRUE,
sort_values = TRUE,
rasterise = RASTERISED,
geom_point_size = GEOM_POINT_SIZE,
na_value = "grey80"
) +
theme_customized_embedding(
x = CB_POSITION[1],
y = CB_POSITION[2]
)embedding_stem |>
dplyr::mutate(
num_umis = colSums(matrix_readcount_use[, cell]),
num_features = colSums(matrix_readcount_use[, cell] > 0),
) |>
dplyr::group_by(batch) |>
dplyr::summarise(
num_cells = n(),
median_umis = median(num_umis),
median_features = median(num_features)
) |>
dplyr::mutate(
sample = dplyr::case_when(
batch == "LW49" ~ "Naïve WIRB3; 5i/L/A",
batch == "LW50" ~ "Blastoid naïve ES cells; 5i/L/A",
batch == "LW51" ~ "Blastoid nEND; NACL",
batch == "LW52" ~ "Blastoid TSCs; TSM"
)
) |>
dplyr::select(
sample, dplyr::everything()
) |>
gt::gt() |>
gt::data_color(
columns = c(median_umis),
colors = scales::col_numeric(
palette = c(
"green", "orange", "red"
),
domain = c(8500, 20000)
)
) |>
gt::summary_rows(
columns = c(sample, batch),
fns = list(
Count = ~ n()
),
decimals = 0
) |>
gt::summary_rows(
columns = c(median_umis:median_features),
fns = list(
Mean = ~ mean(.)
),
decimals = 0
) |>
gt::summary_rows(
columns = c(num_cells),
fns = list(
Sum = ~ sum(.)
),
decimals = 0
) |>
gt::tab_header(
title = gt::md("**Clustering of Blastoid Derived Cells**; Batch")
)| Clustering of Blastoid Derived Cells; Batch | |||||
|---|---|---|---|---|---|
| sample | batch | num_cells | median_umis | median_features | |
| Naïve WIRB3; 5i/L/A | LW49 | 1638 | 18363.0 | 4025.5 | |
| Blastoid naïve ES cells; 5i/L/A | LW50 | 2663 | 9022.0 | 2734.0 | |
| Blastoid nEND; NACL | LW51 | 2055 | 11767.0 | 3250.0 | |
| Blastoid TSCs; TSM | LW52 | 4486 | 8585.5 | 2581.0 | |
| Count | 4 | 4 | — | — | — |
| Mean | — | — | — | 11,934 | 3,148 |
| Sum | — | — | 10,842 | — | — |
purrr::reduce(list(
p_embedding_stem_batch,
p_embedding_stem_UMI,
p_embedding_stem_POU5F1
), `+`) +
patchwork::plot_layout(ncol = 3) +
patchwork::plot_annotation(
theme = ggplot2::theme(plot.margin = ggplot2::margin())
)Polishing
p_embedding_stem_batch <- plot_embedding(
data = embedding_stem[, c(x_column, y_column)],
color = embedding_stem$batch |> as.factor(),
label = NULL,
color_labels = FALSE,
color_legend = TRUE,
sort_values = FALSE,
shuffle_values = TRUE,
rasterise = RASTERISED,
geom_point_size = GEOM_POINT_SIZE
) +
theme_customized_embedding(void = TRUE) +
scale_color_manual(
values = scales::hue_pal()(n = length(unique(embedding_stem$batch))),
labels = c(
"Naïve ES cells",
"Blastoid Naïve ES cells",
"Blastoid nEND",
"Blastoid TSCs"
)
)x_label <- ggplot_build(p_embedding_stem_batch)$layout$panel_params[[1]][c("x.range")] |>
unlist() |>
quantile(0.1)
y_label <- ggplot_build(p_embedding_stem_batch)$layout$panel_params[[1]][c("y.range")] |>
unlist() |>
quantile(0.8)
features_selected <- c(
"ENSG00000181449_SOX2",
"ENSG00000141448_GATA6",
"ENSG00000107485_GATA3"
)
p_embedding_stem_SOX2_GATA6_GATA3 <- purrr::map(features_selected, \(x) {
selected_feature <- x
plot_embedding(
data = embedding_stem[, c(x_column, y_column)],
color = log10(
calc_cpm(matrix_readcount_use[, embedding_stem$cell])
[selected_feature, ] + 1
),
label = NULL,
color_legend = TRUE,
sort_values = TRUE,
rasterise = RASTERISED,
geom_point_size = 0.5,
na_value = "grey80"
) +
theme_customized_embedding(
x = CB_POSITION[1],
y = CB_POSITION[2],
void = TRUE,
legend_key_size = c(1.5, 1.5)
) +
ggplot2::annotate(
geom = "text",
x = x_label,
y = y_label,
label = stringr::str_c(
x |> stringr::str_remove(pattern = "^E.+_")
),
family = "Arial",
color = "black",
size = 5 / ggplot2::.pt,
hjust = 0,
vjust = 0
# parse = TRUE
)
})list(
p_embedding_stem_batch,
p_embedding_stem_SOX2_GATA6_GATA3
) |>
purrr::reduce(`+`) +
patchwork::plot_layout(nrow = 2) +
patchwork::plot_annotation(
theme = ggplot2::theme(plot.margin = ggplot2::margin())
)Trajectory inference
Data loading
MATRIX_DIR <- list(
"github/data/matrices/LW36",
"github/data/matrices/LW49_LW50_LW51_LW52",
"github/data/matrices/LW58_LW59",
"github/data/matrices/LW60_LW61",
"raw/public/PRJEB11202/reformatted_matrix"
)
matrix_readcount_use <- purrr::map(MATRIX_DIR, \(x) {
load_matrix(file.path(PROJECT_DIR, x))
}) |>
purrr::reduce(cbind)
matrix_readcount_use <- matrix_readcount_use[, embedding_timecourse$cell]
# clean up
rm(MATRIX_DIR)Embedding visualization
EMBEDDING_TITLE_PREFIX <- "FIt-SNE"
x_column <- "x_fitsne"
y_column <- "y_fitsne"Clustering & batch & UMI
p_embedding_timecourse_cluster <- plot_embedding(
data = embedding_timecourse[, c(x_column, y_column)],
color = letters[as.integer(embedding_timecourse$louvain + 1)] |>
as.factor(),
label = glue::glue("{EMBEDDING_TITLE_PREFIX}; Cluster"),
color_labels = TRUE,
color_legend = FALSE,
sort_values = FALSE,
shuffle_values = TRUE,
rasterise = RASTERISED,
geom_point_size = GEOM_POINT_SIZE
) +
theme_customized_embedding()
p_embedding_timecourse_batch <- plot_embedding(
data = embedding_timecourse[, c(x_column, y_column)],
color = embedding_timecourse$batch |> as.factor(),
label = glue::glue("{EMBEDDING_TITLE_PREFIX}; Batch"),
color_labels = FALSE,
color_legend = TRUE,
sort_values = FALSE,
shuffle_values = TRUE,
rasterise = RASTERISED,
geom_point_size = GEOM_POINT_SIZE
) +
theme_customized_embedding()
CB_POSITION <- c(0.875, 0.3)
p_embedding_timecourse_UMI <- plot_embedding(
data = embedding_timecourse[, c(x_column, y_column)],
color = log10(Matrix::colSums(matrix_readcount_use[, embedding_timecourse$cell])),
label = glue::glue("{EMBEDDING_TITLE_PREFIX}; UMI"),
color_legend = TRUE,
sort_values = FALSE,
shuffle_values = TRUE,
rasterise = RASTERISED,
geom_point_size = GEOM_POINT_SIZE
) +
theme_customized_embedding(
x = CB_POSITION[1],
y = CB_POSITION[2]
)
selected_feature <- "ENSG00000204531_POU5F1"
p_embedding_timecourse_POU5F1 <- plot_embedding(
data = embedding_timecourse[, c(x_column, y_column)],
color = log10(
calc_cpm(matrix_readcount_use[, embedding_timecourse$cell])
[selected_feature, ] + 1
),
label = glue::glue(
"{EMBEDDING_TITLE_PREFIX}; ",
"{selected_feature |> stringr::str_remove(pattern = \"^E.+_\")}"
),
color_legend = TRUE,
sort_values = TRUE,
rasterise = RASTERISED,
geom_point_size = GEOM_POINT_SIZE,
na_value = "grey80"
) +
theme_customized_embedding(
x = CB_POSITION[1],
y = CB_POSITION[2]
)list(
p_embedding_timecourse_cluster,
p_embedding_timecourse_batch,
p_embedding_timecourse_UMI,
p_embedding_timecourse_POU5F1
) |>
purrr::reduce(`+`) +
patchwork::plot_layout(ncol = 2) +
patchwork::plot_annotation(
theme = ggplot2::theme(plot.margin = ggplot2::margin())
)Polishing
p_embedding_timecourse_batch <- plot_embedding(
data = embedding_timecourse[, c(x_column, y_column)],
color = embedding_timecourse$batch |> as.factor(),
label = NULL,
color_labels = FALSE,
color_legend = TRUE,
sort_values = FALSE,
shuffle_values = TRUE,
rasterise = RASTERISED,
geom_point_size = GEOM_POINT_SIZE
) +
theme_customized_embedding(void = TRUE) +
ggplot2::scale_color_manual(
values = yarrr::piratepal(palette = "google") |> as.character(),
labels = c("D3", "D6", "D9", "D9; PXGL")
)embedding_timecourse <- embedding_timecourse |>
dplyr::select(cell:y_fitsne) |>
dplyr::left_join(
embedding |> dplyr::select(cell, louvain_ = louvain),
by = c("cell" = "cell")
) |>
mutate(
louvain_ = case_when(
is.na(louvain_) ~ "NA",
TRUE ~ as.character(louvain_)
),
#
lineage = case_when(
louvain_ %in% c(11) ~ "EPI",
louvain_ %in% c(18) ~ "HYP",
louvain_ %in% c(0, 1, 8, 9, 12, 17) ~ "TE",
louvain_ %in% c(10, 14) ~ "Pre-lineage",
TRUE ~ "Other"
),
lineage = factor(
lineage,
levels = c("Other", "TE", "Pre-lineage", "HYP", "EPI")
),
#
cluters_selected = case_when(
louvain_ %in% c(2, 3, 4, 5, 6, 7, 13, 15, 16) ~ louvain_,
TRUE ~ "Other"
),
cluters_selected = factor(
cluters_selected,
levels = c("Other", "2", "3", "4", "5", "6", "7", "13", "15", "16")
)
)p_embedding_timecourse_lineage <- plot_embedding(
data = embedding_timecourse[, c(x_column, y_column)],
color = embedding_timecourse$lineage |> as.factor(),
label = NULL,
color_labels = FALSE,
color_legend = TRUE,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
geom_point_size = GEOM_POINT_SIZE,
na_value = "grey70"
) +
ggplot2::scale_color_manual(
name = NULL,
limits = c("TE", "Pre-lineage", "HYP", "EPI"),
values = c("grey70", "#8ace7e", "#ff684c", "#9467bd", "#ffd8b1"),
breaks = c("Other", "EPI", "HYP", "TE", "Pre-lineage"),
labels = c("Other", "ELCs", "HLCs", "TLCs", "Pre-lineage-like"),
guide = ggplot2::guides(
color = ggplot2::guide_legend(
override.aes = list(
size = 2, alpha = 1
),
ncol = 1,
reverse = TRUE,
order = 1
)
),
na.value = "grey70"
) +
theme_customized_embedding(void = TRUE)p_embedding_timecourse_cluster <- plot_embedding(
data = embedding_timecourse[, c(x_column, y_column)],
color = embedding_timecourse$cluters_selected,
label = NULL,
color_labels = FALSE,
color_legend = TRUE,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
geom_point_size = GEOM_POINT_SIZE,
geom_point_legend_ncol = 2
) +
ggplot2::scale_color_manual(
values = c("Other" = "grey70", color_palette_cluster),
limits = embedding_timecourse |>
dplyr::filter(cluters_selected != "Other") |>
dplyr::pull(cluters_selected) |>
as.character() |>
unique() |>
stringr::str_sort(numeric = TRUE),
na.value = "grey70"
) +
theme_customized_embedding(void = TRUE) +
ggrepel::geom_text_repel(
data = get_middle_points(
data = embedding_timecourse |>
dplyr::filter(cluters_selected != "Other"),
x = x_column,
y = y_column,
group = "cluters_selected"
),
ggplot2::aes(
x = x_fitsne,
y = y_fitsne,
label = cluters_selected
),
color = "black",
size = 1.8,
family = "Arial",
#
box.padding = 0.4,
point.padding = 1e-06,
min.segment.length = 0,
arrow = ggplot2::arrow(length = unit(0.015, "npc")),
max.overlaps = Inf,
nudge_x = 0,
nudge_y = 10,
#
segment.color = "grey35",
segment.size = 0.25,
segment.alpha = 1,
# segment.inflect = TRUE,
seed = 20201121
)features_selected <- c(
"ENSG00000181449_SOX2",
"ENSG00000141448_GATA6",
"ENSG00000107485_GATA3"
)
p_embedding_timecourse_SOX2_GATA6_GATA3 <- purrr::map(features_selected, \(x) {
selected_feature <- x
plot_embedding(
data = embedding_timecourse[, c(x_column, y_column)],
color = log10(
calc_cpm(matrix_readcount_use[, embedding_timecourse$cell])
[selected_feature, ] + 1
),
label = NULL,
color_legend = TRUE,
sort_values = TRUE,
rasterise = RASTERISED,
geom_point_size = 0.5,
na_value = "grey80"
) +
theme_customized_embedding(
x = CB_POSITION[1],
y = CB_POSITION[2],
void = TRUE,
legend_key_size = c(1.5, 1.5)
) +
ggplot2::annotate(
geom = "text",
x = x_label,
y = y_label,
label = stringr::str_c(
x |> stringr::str_remove(pattern = "^E.+_")
),
family = "Arial",
color = "black",
size = 5 / ggplot2::.pt,
hjust = 0.5,
vjust = 0
# parse = TRUE
)
})c(
list(
p_embedding_timecourse_batch,
p_embedding_timecourse_lineage,
p_embedding_timecourse_cluster
),
p_embedding_timecourse_SOX2_GATA6_GATA3
) |>
purrr::reduce(`+`) +
patchwork::plot_layout(ncol = 2, byrow = FALSE) +
patchwork::plot_annotation(
theme = theme(plot.margin = margin())
)Cluster composition
calc_group_composition(
data = embedding_timecourse,
x = "louvain",
group = "batch"
) |>
dplyr::mutate(
louvain = as.factor(louvain)
) |>
plot_barplot(
x = "louvain",
y = "percentage",
z = "batch"
) +
ggplot2::scale_fill_manual(
values = yarrr::piratepal(palette = "google") %>% as.character(),
labels = c("D3", "D6", "D9", "D9; PXGL")
) +
ggplot2::guides(fill = ggplot2::guide_legend(direction = "horizontal")) +
ggplot2::theme(legend.position = "bottom")R session info
devtools::session_info()─ Session info ───────────────────────────────────────────────────────────────
setting value
version R version 4.1.2 (2021-11-01)
os macOS Monterey 12.2
system aarch64, darwin20.6.0
ui unknown
language (EN)
collate en_US.UTF-8
ctype en_US.UTF-8
tz America/Chicago
date 2022-01-30
pandoc 2.17.0.1 @ /opt/homebrew/bin/ (via rmarkdown)
─ Packages ───────────────────────────────────────────────────────────────────
package * version date (UTC) lib source
assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.1.1)
backports 1.4.1 2021-12-13 [1] CRAN (R 4.1.2)
BayesFactor 0.9.12-4.3 2021-12-13 [1] CRAN (R 4.1.2)
BiocGenerics 0.40.0 2021-10-26 [1] Bioconductor
bit 4.0.4 2020-08-04 [1] CRAN (R 4.1.1)
bit64 4.0.5 2020-08-30 [1] CRAN (R 4.1.1)
brio 1.1.3 2021-11-30 [1] CRAN (R 4.1.2)
broom 0.7.11 2022-01-03 [1] CRAN (R 4.1.2)
cachem 1.0.6 2021-08-19 [1] CRAN (R 4.1.1)
callr 3.7.0 2021-04-20 [1] CRAN (R 4.1.1)
cellranger 1.1.0 2016-07-27 [1] CRAN (R 4.1.1)
checkmate 2.0.0 2020-02-06 [1] CRAN (R 4.1.1)
circlize 0.4.13 2021-06-09 [1] CRAN (R 4.1.1)
cli 3.1.1 2022-01-20 [1] CRAN (R 4.1.2)
clue 0.3-60 2021-10-11 [1] CRAN (R 4.1.1)
cluster 2.1.2 2021-04-17 [2] CRAN (R 4.1.2)
coda 0.19-4 2020-09-30 [1] CRAN (R 4.1.1)
codetools 0.2-18 2020-11-04 [2] CRAN (R 4.1.2)
colorspace 2.0-2 2021-06-24 [1] CRAN (R 4.1.1)
commonmark 1.7 2018-12-01 [1] CRAN (R 4.1.1)
ComplexHeatmap 2.10.0 2021-10-26 [1] Bioconductor
crayon 1.4.2 2021-10-29 [1] CRAN (R 4.1.1)
data.table 1.14.2 2021-09-27 [1] CRAN (R 4.1.1)
DBI 1.1.2 2021-12-20 [1] CRAN (R 4.1.2)
dbplyr 2.1.1 2021-04-06 [1] CRAN (R 4.1.1)
desc 1.4.0 2021-09-28 [1] CRAN (R 4.1.1)
devtools 2.4.3.9000 2022-01-27 [1] Github (r-lib/devtools@41280ac)
digest 0.6.29 2021-12-01 [1] CRAN (R 4.1.2)
doParallel 1.0.16 2020-10-16 [1] CRAN (R 4.1.1)
dplyr * 1.0.7.9000 2022-01-27 [1] Github (tidyverse/dplyr@0501335)
dtplyr 1.2.1.9000 2022-01-27 [1] Github (tidyverse/dtplyr@8542cf1)
ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.1.1)
evaluate 0.14 2019-05-28 [1] CRAN (R 4.1.1)
extrafont * 0.17 2014-12-08 [1] CRAN (R 4.1.1)
extrafontdb 1.0 2012-06-11 [1] CRAN (R 4.1.1)
fansi 1.0.2 2022-01-14 [1] CRAN (R 4.1.2)
farver 2.1.0 2021-02-28 [1] CRAN (R 4.1.1)
fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.1.1)
forcats * 0.5.1.9000 2022-01-27 [1] Github (tidyverse/forcats@b4dade0)
foreach 1.5.1 2020-10-15 [1] CRAN (R 4.1.1)
fs 1.5.2.9000 2022-01-27 [1] Github (r-lib/fs@6d1182f)
gargle 1.2.0 2021-07-02 [1] CRAN (R 4.1.1)
generics 0.1.1 2021-10-25 [1] CRAN (R 4.1.1)
GetoptLong 1.0.5 2020-12-15 [1] CRAN (R 4.1.1)
ggplot2 * 3.3.5.9000 2022-01-27 [1] Github (tidyverse/ggplot2@c89c265)
ggrepel 0.9.1 2021-01-15 [1] CRAN (R 4.1.1)
ggthemes 4.2.4 2021-01-20 [1] CRAN (R 4.1.2)
GlobalOptions 0.1.2 2020-06-10 [1] CRAN (R 4.1.1)
glue 1.6.1.9000 2022-01-27 [1] Github (tidyverse/glue@3da70df)
googledrive 2.0.0 2021-07-08 [1] CRAN (R 4.1.1)
googlesheets4 1.0.0 2021-07-21 [1] CRAN (R 4.1.1)
gridExtra 2.3 2017-09-09 [1] CRAN (R 4.1.1)
gt 0.3.1 2021-08-07 [1] CRAN (R 4.1.2)
gtable 0.3.0.9000 2022-01-27 [1] Github (r-lib/gtable@a0bd272)
gtools 3.9.2 2021-06-06 [1] CRAN (R 4.1.1)
haven 2.4.3 2021-08-04 [1] CRAN (R 4.1.1)
hms 1.1.1 2021-09-26 [1] CRAN (R 4.1.1)
htmltools 0.5.2 2021-08-25 [1] CRAN (R 4.1.1)
htmlwidgets 1.5.4 2021-09-08 [1] CRAN (R 4.1.1)
httr 1.4.2 2020-07-20 [1] CRAN (R 4.1.1)
IRanges 2.28.0 2021-10-26 [1] Bioconductor
iterators 1.0.13 2020-10-15 [1] CRAN (R 4.1.1)
jpeg 0.1-9 2021-07-24 [1] CRAN (R 4.1.1)
jsonlite 1.7.3 2022-01-17 [1] CRAN (R 4.1.2)
knitr 1.37 2021-12-16 [1] CRAN (R 4.1.2)
labeling 0.4.2 2020-10-20 [1] CRAN (R 4.1.1)
lattice 0.20-45 2021-09-22 [2] CRAN (R 4.1.2)
lifecycle 1.0.1 2021-09-24 [1] CRAN (R 4.1.1)
lubridate 1.8.0 2022-01-27 [1] Github (tidyverse/lubridate@566590f)
magrittr 2.0.2 2022-01-26 [1] CRAN (R 4.1.2)
Matrix * 1.4-0 2021-12-08 [2] CRAN (R 4.1.2)
MatrixModels 0.5-0 2021-03-02 [1] CRAN (R 4.1.1)
matrixStats 0.61.0 2021-09-17 [1] CRAN (R 4.1.1)
memoise 2.0.1 2021-11-26 [1] CRAN (R 4.1.2)
modelr 0.1.8.9000 2022-01-27 [1] Github (tidyverse/modelr@16168e0)
munsell 0.5.0 2018-06-12 [1] CRAN (R 4.1.1)
mvtnorm 1.1-3 2021-10-08 [1] CRAN (R 4.1.1)
patchwork * 1.1.0.9000 2022-01-27 [1] Github (thomasp85/patchwork@79223d3)
pbapply 1.5-0 2021-09-16 [1] CRAN (R 4.1.1)
pillar 1.6.5 2022-01-25 [1] CRAN (R 4.1.2)
pkgbuild 1.3.1 2021-12-20 [1] CRAN (R 4.1.2)
pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.1.1)
pkgload 1.2.4 2021-11-30 [1] CRAN (R 4.1.2)
png 0.1-7 2013-12-03 [1] CRAN (R 4.1.1)
prettyunits 1.1.1.9000 2022-01-27 [1] Github (r-lib/prettyunits@8706d89)
processx 3.5.2 2021-04-30 [1] CRAN (R 4.1.1)
ps 1.6.0 2021-02-28 [1] CRAN (R 4.1.1)
purrr * 0.3.4.9000 2022-01-27 [1] Github (tidyverse/purrr@5aca9df)
R.cache 0.15.0 2021-04-30 [1] CRAN (R 4.1.1)
R.methodsS3 1.8.1 2020-08-26 [1] CRAN (R 4.1.1)
R.oo 1.24.0 2020-08-26 [1] CRAN (R 4.1.1)
R.utils 2.11.0 2021-09-26 [1] CRAN (R 4.1.1)
R6 2.5.1.9000 2022-01-27 [1] Github (r-lib/R6@1b05b89)
ragg 1.2.1.9000 2022-01-27 [1] Github (r-lib/ragg@c68c666)
RColorBrewer 1.1-2 2014-12-07 [1] CRAN (R 4.1.1)
Rcpp 1.0.8 2022-01-13 [1] CRAN (R 4.1.2)
readr * 2.1.1.9000 2022-01-27 [1] Github (tidyverse/readr@718c42c)
readxl 1.3.1.9000 2022-01-27 [1] Github (tidyverse/readxl@2ccb82c)
remotes 2.4.2 2022-01-27 [1] Github (r-lib/remotes@9355549)
reprex 2.0.1 2021-08-05 [1] CRAN (R 4.1.1)
reticulate 1.24 2022-01-26 [1] CRAN (R 4.1.2)
rjson 0.2.21 2022-01-09 [1] CRAN (R 4.1.2)
rlang 1.0.0.9000 2022-01-29 [1] Github (r-lib/rlang@6e9ef5e)
rmarkdown 2.11.12 2022-01-27 [1] Github (rstudio/rmarkdown@b53a7ce)
rprojroot 2.0.2 2020-11-15 [1] CRAN (R 4.1.1)
rstudioapi 0.13.0-9000 2022-01-27 [1] Github (rstudio/rstudioapi@5d0f087)
Rttf2pt1 1.3.9 2021-07-22 [1] CRAN (R 4.1.1)
rvest 1.0.2 2021-10-16 [1] CRAN (R 4.1.1)
S4Vectors 0.32.3 2021-11-21 [1] Bioconductor
sass 0.4.0 2021-05-12 [1] CRAN (R 4.1.1)
scales 1.1.1.9000 2022-01-29 [1] Github (r-lib/scales@13b01f0)
sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.1.2)
shape 1.4.6 2021-05-19 [1] CRAN (R 4.1.1)
stringi 1.7.6 2021-11-29 [1] CRAN (R 4.1.2)
stringr * 1.4.0.9000 2022-01-27 [1] Github (tidyverse/stringr@85f6140)
styler * 1.6.2.9000 2022-01-27 [1] Github (r-lib/styler@9274aed)
systemfonts 1.0.3 2021-10-13 [1] CRAN (R 4.1.2)
testthat 3.1.2.9000 2022-01-27 [1] Github (r-lib/testthat@54b9db2)
textshaping 0.3.6 2021-10-13 [1] CRAN (R 4.1.1)
tibble * 3.1.6.9001 2022-01-27 [1] Github (tidyverse/tibble@64fefd5)
tidyr * 1.1.4.9000 2022-01-27 [1] Github (tidyverse/tidyr@d46d22a)
tidyselect 1.1.1 2021-04-30 [1] CRAN (R 4.1.1)
tidyverse * 1.3.1.9000 2022-01-27 [1] Github (tidyverse/tidyverse@6186fbf)
tzdb 0.2.0 2021-10-27 [1] CRAN (R 4.1.1)
usethis 2.1.5.9000 2022-01-27 [1] Github (r-lib/usethis@57b109a)
utf8 1.2.2 2021-07-24 [1] CRAN (R 4.1.1)
vctrs 0.3.8 2021-04-29 [1] CRAN (R 4.1.1)
viridis 0.6.2 2021-10-13 [1] CRAN (R 4.1.1)
viridisLite 0.4.0 2021-04-13 [1] CRAN (R 4.1.1)
vroom 1.5.7.9000 2022-01-27 [1] Github (r-lib/vroom@59261e3)
withr 2.4.3 2021-11-30 [1] CRAN (R 4.1.2)
xfun 0.29 2021-12-14 [1] CRAN (R 4.1.2)
xml2 1.3.3 2021-11-30 [1] CRAN (R 4.1.2)
yaml 2.2.2 2022-01-25 [1] CRAN (R 4.1.2)
yarrr 0.1.5 2017-04-19 [1] CRAN (R 4.1.2)
[1] /opt/homebrew/lib/R/4.1/site-library
[2] /opt/homebrew/Cellar/r/4.1.2/lib/R/library
─ Python configuration ───────────────────────────────────────────────────────
python: /Users/jialei/.pyenv/shims/python
libpython: /Users/jialei/.pyenv/versions/miniforge3-4.10.1-5/lib/libpython3.9.dylib
pythonhome: /Users/jialei/.pyenv/versions/miniforge3-4.10.1-5:/Users/jialei/.pyenv/versions/miniforge3-4.10.1-5
version: 3.9.5 | packaged by conda-forge | (default, Jun 19 2021, 00:24:55) [Clang 11.1.0 ]
numpy: /Users/jialei/.pyenv/versions/miniforge3-4.10.1-5/lib/python3.9/site-packages/numpy
numpy_version: 1.20.3
numpy: /Users/jialei/.pyenv/versions/miniforge3-4.10.1-5/lib/python3.9/site-packages/numpy
NOTE: Python version was forced by RETICULATE_PYTHON
──────────────────────────────────────────────────────────────────────────────